Preparación de datos y análisis preliminares
23 de abril de 2024
Rows: 394.546
Columns: 17
$ sexo <dbl> 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1…
$ dia_nac <dbl> 14, 19, 8, 5, 12, 3, 6, 12, 14, 7, 23, 19, 28, 17, 9, 9, 2…
$ mes_nac <dbl> 7, 6, 12, 1, 3, 4, 11, 7, 9, 1, 2, 12, 2, 12, 9, 9, 6, 2, …
$ ano_nac <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018…
$ semanas <dbl> 40, 38, 40, 38, 40, 39, 33, 39, 39, 35, 38, 38, 39, 40, 38…
$ peso <dbl> 3290, 3660, 3390, 2740, 3080, 2885, 1210, 2890, 3330, 2730…
$ talla <dbl> 50, 49, 51, 48, 47, 47, 40, 48, 49, 47, 46, 51, 50, 48, 47…
$ comuna <dbl> 2201, 2101, 10101, 16101, 5101, 5101, 10101, 10101, 10101,…
$ region <dbl> 2, 2, 10, 16, 5, 5, 10, 10, 10, 16, 5, 5, 12, 10, 12, 12, …
$ edad_madre <dbl> 31, 35, 32, 27, 22, NA, 24, 20, 22, 28, 54, 39, 39, 39, 43…
$ nivel_madre <dbl> 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2…
$ edad_padre <dbl> 34, 42, 35, 29, 28, 32, 24, 19, 31, 30, 42, 44, 29, NA, 52…
$ nivel_padre <dbl> 2, 2, 1, 1, 1, 2, 2, 2, 4, 2, 1, 1, 2, NA, 2, 4, 1, 1, 2, …
$ activ_madre <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0…
$ tipo_parto <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ activ_padre <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, 1, 1, 1, NA, 1, 1, …
$ date_nac <date> 2018-07-14, 2018-06-19, 2018-12-08, 2018-01-05, 2018-03-1…
[1] 394546
[1] 362570
[1] 31976
FALSE TRUE
361975 595
# Excluímos
births <- births %>% filter(semanas>=37)
# Ajustamos valores de variables
births <- births %>%
mutate(semanas=if_else(semanas==99, NA_real_, semanas),
peso=if_else(peso==9999, NA_real_, peso),
sexo=if_else(sexo==9, NA_real_, sexo),
talla=if_else(talla==99, NA_real_, talla),
edad_madre=if_else(edad_madre==99, NA_real_, edad_madre),
nivel_madre=if_else(nivel_madre==9, NA_real_, nivel_madre),
activ_madre=if_else(activ_madre %in% c(3,9), NA_real_, activ_madre),
edad_padre=if_else(edad_padre==99, NA_real_, edad_padre),
nivel_padre=if_else(nivel_padre==9, NA_real_, nivel_padre),
activ_padre=if_else(activ_padre %in% c(3,9), NA_real_, activ_padre),
tipo_parto=if_else(tipo_parto==9, NA_real_, tipo_parto)
)
births <- births[,c(1:9, 15, 10, 11, 14, 12, 13, 16)]
table(is.na(births$semanas))
FALSE TRUE
361922 53
g1 <- ggplot(births, aes(x=semanas)) +
geom_histogram(alpha=0.5, bins=30, fill = "deepskyblue3", color="deepskyblue3") +
scale_x_continuous(breaks=seq(min(births$semanas), max(births$semanas), by=1)) +
labs(x="Semanas de gestación", y="Frecuencia", tag="A)") +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
g2 <- ggplot(births, aes(y=semanas)) +
stat_boxplot(geom = "errorbar", width = 0.15, color = 1) +
geom_boxplot(fill = "gray", alpha = 0.5, color = "black", width=0.7,
outlier.colour = 2, outlier.fill = "white", outlier.size = .75) +
scale_y_continuous(breaks=seq(min(births$semanas), max(births$semanas), by=1)) +
labs(y="Semanas de gestación", y="", tag="B)", x="") +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
ggpubr::ggarrange(g1, g2, ncol = 2, widths = c(2, 1))Figura 1: Distribución de las semanas gestacionales.
g3 <- ggplot(births, aes(x=peso)) +
geom_histogram(alpha=0.5, bins=150, fill = "deepskyblue3", color="deepskyblue3") +
scale_x_continuous(breaks=seq(0, 10000, by=1000)) +
labs(x="Peso (gr.)", y="Frecuencia", tag="A)") +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
g4 <- ggplot(births, aes(y=peso)) +
stat_boxplot(geom = "errorbar", width = 0.15, color = 1) +
geom_boxplot(fill = "gray", alpha = 0.5, color = "black", width=0.7,
outlier.colour = 2, outlier.fill = "white", outlier.size = .75) +
scale_y_continuous(breaks=seq(0, 10000, by=1000)) +
labs(y="Peso (gr.)", y=NULL, tag="B)", x="") +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
ggpubr::ggarrange(g3, g4, ncol = 2, widths = c(2, 1))Figura 2: Distribución del peso al nacer.
vars <- c("semanas", "peso")
table <- tibble()
for (i in vars){
# Asegúrate de que i es convertido a simbolo con as.symbol o rlang::sym
des_res <- descriptives(x = !!rlang::sym(i), data = births)
table <- dplyr::bind_rows(table, des_res)
}
table$Variable <- c("Gestación (semanas)", "Peso (gr.)")
flextable::flextable(table)Variable |
Media_Prop |
SD |
Min |
P5 |
P10 |
P25 |
P50 |
P75 |
P90 |
P95 |
Max |
N |
Missing |
Pct_miss |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Gestación (semanas) |
38.756 |
1.026 |
37 |
37 |
38 |
38 |
39 |
40 |
40 |
40 |
44 |
361,922 |
0 |
0 |
Peso (gr.) |
3,383.831 |
434.012 |
1,215 |
2,690 |
2,845 |
3,095 |
3,370 |
3,665 |
3,950 |
4,120 |
5,740 |
361,922 |
9 |
0 |
Figura 3: Análisis de missing data.
births <- births %>% mutate(date_nac = make_date(year = ano_nac, month = mes_nac, day = dia_nac))
nac <- births %>%
group_by(ano_nac, mes_nac) %>%
summarise(n=n(),
peso_mean=mean(peso, na.rm = TRUE),
peso_median=median(peso, na.rm = TRUE),
peso_min=min(peso, na.rm = TRUE),
peso_max=max(peso, na.rm = TRUE),
) %>%
mutate(date_nac=make_date(year=ano_nac, month=mes_nac))
g1 <- ggplot(nac, aes(x = date_nac, y = n)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
#facet_wrap(~ ano_nac, scales = "free_x", ncol=2) +
labs(x = "Fecha", y = "Cantidad de nacimientos", tag="A.", title="Número de nacimientos") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g2 <- ggplot(nac, aes(x = date_nac, y = peso_mean)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
#facet_wrap(~ ano_nac, scales = "free_x", ncol=2) +
labs(x = "Fecha", y = "Peso al nacer (gr.)", tag="B.", title="Peso promedio") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g3 <- ggplot(nac, aes(x = date_nac, y = peso_min)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
#facet_wrap(~ ano_nac, scales = "free_x", ncol=2) +
labs(x = "Fecha", y = "Peso al nacer", tag="A.", title="Peso mínimo") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g4 <- ggplot(nac, aes(x = date_nac, y = peso_max)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
#facet_wrap(~ ano_nac, scales = "free_x", ncol=2) +
labs(x = "Fecha", y = "Peso al nacer (gr.)", tag="B.", title="Peso máximo") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
ggpubr::ggarrange(g1, g2, g3, g4, ncol = 2, nrow = 2)Figura 4: Distribución de peso al nacer en el tiempo.
comunas <- chilemapas::codigos_territoriales %>%
mutate(codigo_comuna=as.numeric(codigo_comuna),
codigo_provincia=as.numeric(codigo_provincia),
codigo_region=as.numeric(codigo_region)
)
births <- births %>%
left_join(comunas, by=c("comuna"="codigo_comuna")) %>%
mutate(zona = case_when(
nombre_region %in% c("Arica y Parinacota", "Tarapacá", "Antofagasta", "Atacama", "Coquimbo") ~ "Norte",
nombre_region %in% c("Valparaiso", "Metropolitana de Santiago", "Libertador General Bernardo O'Higgins", "Maule", "Nuble", "Biobio") ~ "Centro",
nombre_region %in% c("La Araucanía", "Los Ríos", "Los Lagos", "Aysen del General Carlos Ibanez del Campo", "Magallanes y de la Antartica Chilena") ~ "Sur",
TRUE ~ NA_character_
))
rm(comunas)
nac2 <- births %>%
group_by(zona, ano_nac, mes_nac) %>%
summarise(n=n(),
peso_mean=mean(peso, na.rm = TRUE),
peso_median=median(peso, na.rm = TRUE),
peso_min=min(peso, na.rm = TRUE),
peso_max=max(peso, na.rm = TRUE),
) %>%
mutate(date_nac=make_date(year=ano_nac, month=mes_nac)) %>%
mutate(zona=factor(zona, levels=c("Norte", "Centro", "Sur")))
g1 <- ggplot(nac2, aes(x = date_nac, y = n, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Cantidad de nacimientos", tag="A.", title="Número de nacimientos") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g2 <- ggplot(nac2, aes(x = date_nac, y = peso_mean, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Peso al nacer (gr.)", tag="B.", title="Peso promedio") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
ggpubr::ggarrange(g1, g2, ncol = 2, nrow = 1)Figura 5: Distribución de peso al nacer en el tiempo y zona.
temperatura <- temp %>%
group_by(zona, fecha) %>%
summarise(n=n(),
temp_mean=mean(xt_diario, na.rm = TRUE),
temp_median=median(xt_diario, na.rm = TRUE),
temp_min=mean(t_min, na.rm = TRUE),
temp_max=mean(t_max, na.rm = TRUE),
hum_mean=mean(xh_diario, na.rm = TRUE),
hum_median=median(xh_diario, na.rm = TRUE),
hum_min=mean(h_min, na.rm = TRUE),
hum_max=mean(h_max, na.rm = TRUE),
) %>%
drop_na()
g1 <- ggplot(temperatura, aes(x = fecha, y = temp_mean, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
scale_y_continuous(limits = c(0,40), n.breaks = 5) +
labs(x = "Fecha", y = "Temperatura (º C.)", tag="A.", title="Promedio") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g2 <- ggplot(temperatura, aes(x = fecha, y = temp_min, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_y_continuous(limits = c(0,40), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Temperatura (º C.)", tag="A.", title="Mínima") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g3 <- ggplot(temperatura, aes(x = fecha, y = temp_max, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_y_continuous(limits = c(0,40), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Temperatura (º C.)", tag="A.", title="Máxima") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
ggpubr::ggarrange(g1, g2, g3, ncol = 3, nrow = 1)Figura 6: Temperatura en el tiempo y zona.
g1 <- ggplot(temperatura, aes(x = fecha, y = hum_mean, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
scale_y_continuous(limits = c(0,100), n.breaks = 5) +
labs(x = "Fecha", y = "Humedad (%)", tag="A.", title="Promedio") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g2 <- ggplot(temperatura, aes(x = fecha, y = hum_min, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_y_continuous(limits = c(0,100), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Humedad (%)", tag="A.", title="Mínima") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g3 <- ggplot(temperatura, aes(x = fecha, y = hum_max, color=zona)) +
geom_line() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_y_continuous(limits = c(0,100), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Fecha", y = "Humedad (%)", tag="A.", title="Máxima") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
ggpubr::ggarrange(g1, g2, g3, ncol = 3, nrow = 1)Figura 7: Humedad en el tiempo y zona.
Data de nacimiento
OB1: Expandir la base de datos con semanas gestacionales.
# A tibble: 1 × 6
sexo semanas peso talla comuna date_nac
<dbl> <dbl> <dbl> <dbl> <dbl> <date>
1 1 40 3290 50 2201 2018-07-14
# A tibble: 40 × 10
sexo semanas peso talla comuna date_nac week_gest week_gest_num
<dbl> <dbl> <dbl> <dbl> <dbl> <date> <date> <dbl>
1 1 40 3290 50 2201 2018-07-14 2017-10-14 1
2 1 40 3290 50 2201 2018-07-14 2017-10-21 2
3 1 40 3290 50 2201 2018-07-14 2017-10-28 3
4 1 40 3290 50 2201 2018-07-14 2017-11-04 4
5 1 40 3290 50 2201 2018-07-14 2017-11-11 5
6 1 40 3290 50 2201 2018-07-14 2017-11-18 6
7 1 40 3290 50 2201 2018-07-14 2017-11-25 7
8 1 40 3290 50 2201 2018-07-14 2017-12-02 8
9 1 40 3290 50 2201 2018-07-14 2017-12-09 9
10 1 40 3290 50 2201 2018-07-14 2017-12-16 10
# ℹ 30 more rows
# ℹ 2 more variables: date_start_week <date>, date_end_week <date>
# Tiempo de ejecución 1,6 mins.
births_weeks <- births %>%
drop_na(date_nac, semanas) %>%
mutate(id=1:n()) %>%
mutate(date_start = date_nac - weeks(semanas-2),
date_start = date_start - weeks(1),
date_end = date_nac) %>%
rowwise() %>%
mutate(week_gest = list(seq.Date(date_start, date_end, by = "week"))) %>%
unnest(week_gest) %>%
group_by(id) %>%
mutate(week_gest_num = paste0(abs(semanas - row_number())),
week_gest_num = (semanas) - as.numeric(week_gest_num),
date_start_week = (week_gest - (7 * abs(week_gest_num - row_number()))) - weeks(1),
date_end_week = week_gest - (7 * abs(week_gest_num - row_number()))
)
group_by(id) %>%
distinct(week_gest_num, .keep_all = TRUE) %>%
arrange(id, week_gest_num) %>%
ungroup() Rows: 15.114.154
Columns: 24
$ sexo <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ dia_nac <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14…
$ mes_nac <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
$ ano_nac <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, …
$ semanas <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40…
$ peso <dbl> 3290, 3290, 3290, 3290, 3290, 3290, 3290, 3290, 3290, …
$ talla <dbl> 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50…
$ comuna <dbl> 2201, 2201, 2201, 2201, 2201, 2201, 2201, 2201, 2201, …
$ region <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ edad_madre <dbl> 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31…
$ nivel_madre <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ edad_padre <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34…
$ nivel_padre <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ activ_madre <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ tipo_parto <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ activ_padre <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ date_nac <date> 2018-07-14, 2018-07-14, 2018-07-14, 2018-07-14, 2018-…
$ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ date_start <date> 2017-10-14, 2017-10-14, 2017-10-14, 2017-10-14, 2017-…
$ date_end <date> 2018-07-14, 2018-07-14, 2018-07-14, 2018-07-14, 2018-…
$ week_gest <date> 2017-10-14, 2017-10-21, 2017-10-28, 2017-11-04, 2017-…
$ week_gest_num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
$ date_start_week <date> 2017-10-07, 2017-10-14, 2017-10-21, 2017-10-28, 2017-…
$ date_end_week <date> 2017-10-14, 2017-10-21, 2017-10-28, 2017-11-04, 2017-…
Agregar calculos para temperatura y humedad según semana gestacional:
calculate_temperature_stats <- function(start_date, end_date, comuna_id, temp_data) {
# Asegurar que temp_data es un data.table
setDT(temp_data)
# Filtrar los datos de temperatura dentro del rango de fechas y por comuna específica usando data.table
week_temperatures <- temp_data[fecha >= start_date & fecha < end_date & comuna == comuna_id,
.(t_min, t_max, xt_diario)]
# Si no hay datos, retornar NA para cada estadística
if (nrow(week_temperatures) == 0) {
return(as.data.table(list(temp_mean=NA, temp_min=NA, temp_max=NA,
temp_mean_min=NA, temp_mean_max=NA,
temp_q05=NA, temp_q10=NA, temp_q20=NA, temp_q30=NA,
temp_q40=NA, temp_q50=NA, temp_q60=NA, temp_q70=NA,
temp_q80=NA, temp_q90=NA, temp_q95=NA)))
}
# Calcular y retornar estadísticas usando data.table
return(list(
temp_mean = mean(week_temperatures$xt_diario, na.rm = TRUE),
temp_min = min(week_temperatures$t_min, na.rm = TRUE),
temp_max = max(week_temperatures$t_max, na.rm = TRUE),
temp_mean_min = mean(week_temperatures$t_min, na.rm = TRUE),
temp_mean_max = mean(week_temperatures$t_max, na.rm = TRUE),
temp_q05 = quantile(week_temperatures$xt_diario, probs = 0.05, na.rm = TRUE),
temp_q10 = quantile(week_temperatures$xt_diario, probs = 0.1, na.rm = TRUE),
temp_q20 = quantile(week_temperatures$xt_diario, probs = 0.2, na.rm = TRUE),
temp_q30 = quantile(week_temperatures$xt_diario, probs = 0.3, na.rm = TRUE),
temp_q40 = quantile(week_temperatures$xt_diario, probs = 0.4, na.rm = TRUE),
temp_q50 = quantile(week_temperatures$xt_diario, probs = 0.5, na.rm = TRUE),
temp_q60 = quantile(week_temperatures$xt_diario, probs = 0.6, na.rm = TRUE),
temp_q70 = quantile(week_temperatures$xt_diario, probs = 0.7, na.rm = TRUE),
temp_q80 = quantile(week_temperatures$xt_diario, probs = 0.8, na.rm = TRUE),
temp_q90 = quantile(week_temperatures$xt_diario, probs = 0.9, na.rm = TRUE),
temp_q95 = quantile(week_temperatures$xt_diario, probs = 0.95, na.rm = TRUE)
))
}process_chunk_t <- function(chunk) {
# Aplicar la función calculate_temperature_stats a cada fila de este chunk
chunk[, c("temp_mean", "temp_min", "temp_max", "temp_mean_min", "temp_mean_max",
"temp_q05", "temp_q10", "temp_q20", "temp_q30", "temp_q40", "temp_q50",
"temp_q60", "temp_q70", "temp_q80", "temp_q90", "temp_q95") :=
calculate_temperature_stats(date_start_week, date_end_week, comuna, temp),
by = .(date_start_week, date_end_week, comuna)]
return(chunk)
}gen_bt_data <- function(input, id_start, id_end, data_out){
stime <- Sys.time()
for(i in id_start:id_end){
results <- future.apply::future_lapply(input[i], process_chunk_t)
results <- rbindlist(results)
# Save results
save(results, file=paste0(data_out, "temp_data/","births_temp", "_id", i, ".RData"))
}
etime <- Sys.time()
t1 <- etime - stime
print(t1) # Time execution
}
# Aplicamos la función
gen_bt_data(input = parts, id_start = 1, id_end = 1000, data_out = "Data/Output/")Tiempo de ejecusión: 30 minutos aproximadamente.
Rows: 38.288
Columns: 40
$ sexo <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ dia_nac <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14…
$ mes_nac <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
$ ano_nac <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, …
$ semanas <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40…
$ peso <dbl> 3290, 3290, 3290, 3290, 3290, 3290, 3290, 3290, 3290, …
$ talla <dbl> 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50…
$ comuna <dbl> 2201, 2201, 2201, 2201, 2201, 2201, 2201, 2201, 2201, …
$ region <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ edad_madre <dbl> 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31…
$ nivel_madre <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ edad_padre <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34…
$ nivel_padre <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ activ_madre <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ tipo_parto <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ activ_padre <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ date_nac <date> 2018-07-14, 2018-07-14, 2018-07-14, 2018-07-14, 2018-…
$ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ date_start <date> 2017-10-14, 2017-10-14, 2017-10-14, 2017-10-14, 2017-…
$ date_end <date> 2018-07-14, 2018-07-14, 2018-07-14, 2018-07-14, 2018-…
$ week_gest <date> 2017-10-14, 2017-10-21, 2017-10-28, 2017-11-04, 2017-…
$ week_gest_num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
$ date_start_week <date> 2017-10-07, 2017-10-14, 2017-10-21, 2017-10-28, 2017-…
$ date_end_week <date> 2017-10-14, 2017-10-21, 2017-10-28, 2017-11-04, 2017-…
$ temp_mean <dbl> 13,54061, 17,85728, 15,74443, 13,28057, 15,49821, 16,5…
$ temp_min <dbl> -0,8, 5,1, 2,1, 0,2, 3,4, 4,5, 1,8, 1,8, 2,4, 3,2, 4,5…
$ temp_max <dbl> 24,3, 29,0, 26,3, 24,0, 25,8, 25,8, 25,5, 25,1, 25,8, …
$ temp_mean_min <dbl> 1,900000, 7,657143, 4,557143, 1,983333, 4,714286, 7,22…
$ temp_mean_max <dbl> 23,31429, 26,91429, 24,91429, 22,67143, 24,45714, 25,1…
$ temp_q05 <dbl> 12,81529, 17,14458, 15,18458, 12,08333, 14,65750, 15,6…
$ temp_q10 <dbl> 12,86808, 17,20583, 15,22333, 12,13333, 14,81500, 15,8…
$ temp_q20 <dbl> 12,94410, 17,31400, 15,30833, 12,40800, 15,02917, 16,2…
$ temp_q30 <dbl> 12,96103, 17,39350, 15,40833, 13,03200, 15,04167, 16,2…
$ temp_q40 <dbl> 13,07833, 17,46533, 15,48667, 13,28000, 15,09083, 16,3…
$ temp_q50 <dbl> 13,24583, 17,53333, 15,55417, 13,34000, 15,15833, 16,6…
$ temp_q60 <dbl> 13,52583, 17,97333, 15,55667, 13,66640, 15,45583, 16,8…
$ temp_q70 <dbl> 13,77250, 18,30256, 15,71067, 13,88470, 15,82833, 16,9…
$ temp_q80 <dbl> 13,95250, 18,41026, 16,16767, 13,88680, 16,35083, 17,2…
$ temp_q90 <dbl> 14,46583, 18,65329, 16,55840, 14,08417, 16,54667, 17,3…
$ temp_q95 <dbl> 14,80583, 18,80865, 16,73720, 14,23167, 16,56292, 17,3…
df_final <- df_final %>%
mutate(zona = case_when(
comuna %in% c(2101, 2201, 15101) ~ "Norte",
comuna %in% c(5101, 5104, 5201, 5606,
7301, 8101, 8301, 13113,
13124, 13126, 16101) ~ "Centro",
comuna %in% c(10101, 10301, 10402,
10404, 11101, 11201,
11301, 11401, 12101,
12301, 12401
) ~ "Sur",
TRUE ~ NA_character_
)) %>%
mutate(zona=factor(zona, levels=c("Norte", "Centro", "Sur")))
g1 <- df_final %>%
drop_na(zona) %>%
ggplot(aes(x=temp_mean, fill=zona)) +
geom_histogram(bins=150, alpha=0.5) +
scale_x_continuous(breaks=seq(-10, 35, by=5), limits = c(-10, 35)) +
scale_fill_discrete(name="") +
labs(x="Temperatura media", y="Frecuencia", tag="A.", title="Promedio") +
#facet_wrap(~ zona, scales = "f", ncol=1) +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
g2 <- df_final %>%
drop_na(zona) %>%
ggplot(aes(x=temp_mean_min, fill=zona)) +
geom_histogram(bins=150, alpha=0.5) +
scale_x_continuous(breaks=seq(-10, 35, by=5), limits = c(-10, 35)) +
scale_fill_discrete(name="") +
labs(x="Temperatura media", y="Frecuencia", tag="B.", title="Mínimo") +
#facet_wrap(~ zona, scales = "fixed", ncol=1) +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
g3 <- df_final %>%
drop_na(zona) %>%
ggplot(aes(x=temp_mean_max, fill=zona)) +
geom_histogram(bins=150, alpha=0.5) +
scale_x_continuous(breaks=seq(-10, 35, by=5), limits = c(-10, 35)) +
scale_fill_discrete(name="") +
labs(x="Temperatura media", y="Frecuencia", tag="C.", title="Máximo") +
#facet_wrap(~ zona, scales = "fixed", ncol=1) +
theme_light() +
theme(panel.background = element_rect(fill = "white"),
panel.grid = element_blank(),
legend.position = "top",
legend.background = element_rect(fill = alpha("white", 0.0), color = alpha("white", 0.5)),
strip.background = element_rect(fill="white", color="white"),
strip.text.x = element_text(size=10, hjust = 0, color="black"),
legend.key.width = unit(1.5,"cm"),
plot.tag=element_text(size=16, face="bold"),
axis.line.x = element_blank())
ggpubr::ggarrange(g1, g2, g3, ncol = 3, nrow = 1, common.legend = TRUE)Figura 8: Distribución de temperatura.
temperatura <- df_final %>%
group_by(zona, week_gest_num) %>%
summarise(temp_mean=mean(temp_mean, na.rm = TRUE),
temp_min=mean(temp_mean_min, na.rm = TRUE),
temp_max=mean(temp_mean_max, na.rm = TRUE)
) %>%
drop_na()
g1 <- ggplot(temperatura, aes(x = week_gest_num, y = temp_mean, color=zona)) +
geom_point(size=0.5) +
geom_line() +
scale_x_continuous(limits = c(1,max(temperatura$week_gest_num)), n.breaks = 10) +
scale_y_continuous(limits = c(0,25), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Semana gestacional", y = "Temperatura (º C.)", tag="A.", title="Promedio") +
theme_light() +
theme(legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g2 <- ggplot(temperatura, aes(x = week_gest_num, y = temp_min, color=zona)) +
geom_point(size=0.5) +
geom_line() +
scale_x_continuous(limits = c(1,max(temperatura$week_gest_num)), n.breaks = 10) +
scale_y_continuous(limits = c(0,25), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Semana gestacional", y = "Temperatura (º C.)", tag="B.", title="Mínimo") +
theme_light() +
theme(legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
g3 <- ggplot(temperatura, aes(x = week_gest_num, y = temp_max, color=zona)) +
geom_point(size=0.5) +
geom_line() +
scale_x_continuous(limits = c(1,max(temperatura$week_gest_num)), n.breaks = 10) +
scale_y_continuous(limits = c(0,25), n.breaks = 5) +
facet_wrap(~ zona, scales = "free_x", ncol=1) +
labs(x = "Semana gestacional", y = "Temperatura (º C.)", tag="C.", title="Máximo") +
theme_light() +
theme(legend.title = element_blank(),
legend.position = "none",
strip.background = element_blank(),
strip.text = element_text(color="black"))
ggpubr::ggarrange(g1, g2, g3, ncol = 3, nrow = 1)Figura 9: Temperatura según semanas gestacionales
Construcción de los datos
Análisis descriptivo
Modelo GAM 1
Modelo GAM 2
Repetir análisis para humedad. Futuro cercano…Modelos DLNM.
Repositorio en Github: https://github.com/JDConejeros/CIIIA-ClimateBirthWeightAnalysis
Preparación de datos y análisis preliminares
© José Daniel Conejeros | jdconejeros@uc.cl | JDConejeros
Proyecto Redes | Hecho en Quarto
© jdconejeros@uc.cl | JDConejeros | Proyecto Redes | Hecho en Quarto